In [1]:
import pandas as pd
pd.set_option('display.max_rows', 15)
In [2]:
from zipline.pipeline import Pipeline, engine_from_files
from zipline.utils.tradingcalendar import trading_days
start_date = pd.Timestamp('2014-01-02', tz='UTC')
end_date = pd.Timestamp("2014-06-30", tz='UTC')
engine = engine_from_files(
'data/equity_daily_bars.bcolz/',
'data/adjustments.db',
'data/assets.db',
trading_days,
warmup_assets=True,
)
In [3]:
from zipline.pipeline.data import USEquityPricing as USEP
from zipline.pipeline.factors import SimpleMovingAverage
# sma30 and sma90 are Factors.
# Factors represent computations producing numerical-valued outputs.
sma30 = SimpleMovingAverage(inputs=[USEP.close], window_length=30)
sma90 = SimpleMovingAverage(inputs=[USEP.close], window_length=90)
# Comparisons between Factors produce Filters.
# Filters represent computations producing boolean-valued outputs.
screen = (sma30 > sma90)
example0 = Pipeline(
columns={"sma30": sma30, "sma90": sma90},
screen=screen,
)
example0
Out[3]:
In [4]:
example0.show_graph('svg')
Out[4]:
In [5]:
results0 = engine.run_pipeline(example0, start_date, end_date)
results0
Out[5]:
In [6]:
from zipline.pipeline.factors import VWAP
vwap30 = VWAP(window_length=30)
vwap90 = VWAP(window_length=90)
# Arithmetic operations between Factors produce new Factors.
vwap_pct_change = ((vwap30 - vwap90) / vwap30)
# rank() is a method available on any Factor instance.
# It produces a new Factor containing the numerical rank of each
# asset after sorting the underyling Factor values.
vwap_pct_change_rank = vwap_pct_change.rank(ascending=False)
# top() is another method available on Factors. It produces a Filter
# representing the top N assets sorted by the underlying Factor values.
top200 = vwap_pct_change.top(200)
example1 = Pipeline(
columns={
"rank": vwap_pct_change_rank,
"pct_change": vwap_pct_change,
},
screen=top200,
)
In [7]:
example1.show_graph()
Out[7]:
In [8]:
engine.run_pipeline(example1, start_date, end_date)
Out[8]:
In [11]:
import numpy as np
from zipline.pipeline import CustomFactor
class MaxDrawdown(CustomFactor):
"""
Factor computing the maximum drawdown
an asset has taken in the last N days.
"""
inputs = [USEP.close]
def compute(self, today, assets, out, closes):
# The difference between each day and the max of all
# earlier days in the period.
drawdowns = fmax.accumulate(closes, axis=0) - closes
drawdowns[isnan(drawdowns)] = np.NINF
drawdown_ends = np.nanargmax(drawdowns, axis=0)
# This is slow in pure Python.
# Cython or Numba could accelerate this substantially.
for i, end in enumerate(drawdown_ends):
peak = nanmax(data[:end + 1, i])
out[i] = (peak - data[end, i]) / data[end, i]
maxdd_90 = MaxDrawdown(window_length=90)
# rank() takes an optional `mask` keyword, which can be passed a Filter
# to signify "Compute rank() only for assets for which the Filter
# returned True.
masked_rank = vwap_pct_change.rank(mask=maxdd_90.bottom(200))
example2 = Pipeline(
columns={
'masked_rank': masked_rank
},
screen=maxdd_90.bottom(200),
)
In [12]:
example2.show_graph('svg')
Out[12]:
In [13]:
engine.run_pipeline(example1, start_date, end_date)
Out[13]:
In [ ]: